1 OECD 국가

Wikipedia OECD 웹사이트에서 먼저 OECD 국가명을 추출한다.

library(tidyverse)
library(rvest)

oecd_countries <- read_html("https://en.wikipedia.org/wiki/OECD") %>% 
  html_nodes(xpath = '//*[@id="mw-content-text"]/div[1]/table[5]') %>% 
  html_table() %>% 
  .[[1]]

oecd_df <- oecd_countries %>% 
  as_tibble() %>% 
  janitor::clean_names() %>% 
  # mutate(id = row_number()) %>%
  select(content=country, start=membership_1, group = geographic_location) %>% 
  mutate(start = str_extract(start, "[0-9]{1,2}\\s[a-zA-Z]+\\s[0-9]{4}")) %>% 
  mutate(start = lubridate::dmy(start)) 

oecd_df
# A tibble: 37 x 3
   content        start      group        
   <chr>          <date>     <chr>        
 1 Australia      1971-06-07 Oceania      
 2 Austria        1961-09-29 Europe       
 3 Belgium        1961-09-13 Europe       
 4 Canada         1961-04-10 North America
 5 Chile          2010-05-07 South America
 6 Colombia       2020-04-28 South America
 7 Czech Republic 1995-12-21 Europe       
 8 Denmark        1961-05-30 Europe       
 9 Estonia        2010-12-09 Europe       
10 Finland        1969-01-28 Europe       
# ... with 27 more rows

OECD 가입순서대로 대륙별로 타임라인을 잡아보자.

library(timevis)

location <- oecd_df %>% 
  count(group) %>% 
  select(id = group) %>% 
  mutate(content = id)
  
timevis(oecd_df, groups = location, options = list(stack = FALSE)) %>% 
  setOptions(list(editable = TRUE)) %>%
  setSelection("South Korea") %>%
  fitWindow(list(animation = FALSE))

2 OECD 국가 비교

oecd_fact <- read_html("https://en.wikipedia.org/wiki/OECD") %>% 
  html_nodes(xpath = '//*[@id="mw-content-text"]/div[1]/table[6]') %>% 
  html_table() %>% 
  .[[1]] %>% 
  as_tibble() %>% 
  janitor::clean_names()

oecd_fact_df <- oecd_fact %>% 
  set_names(c("country", "area", "population", "gdp", 
              "gdp_per_capita", "income_inequality", 
              "hdi", "fsi", "rli", "cpi", "ief", 
              "gpi", "wpfi", "di")) %>% 
  mutate(income_inequality = ifelse(income_inequality == "N/A", NA, income_inequality),
         fsi = ifelse(fsi == "N/A", NA, fsi),
         rli = ifelse(rli == "N/A", NA, rli),
         gpi = ifelse(gpi == "N/A", NA, gpi)) %>% 
  select(-rli) %>%  ## 결측값이 8개국
  mutate(income_inequality = ifelse(country == "New Zealand", 33, income_inequality),
         income_inequality = ifelse(country == "Poland", 31.8, income_inequality),
         fsi = ifelse(country == "Israel", 75.1, fsi)) %>% 
  filter(!str_detect(country, "Luxembourg|OECD|Country"))  %>% 
  mutate(across(.cols=area:di, parse_number))


## 변수 결측값
sapply(oecd_fact_df, function(y) sum(length(which(is.na(y))))) %>% 
  as.data.frame() %>% 
  rownames_to_column(var="country") %>% 
  as_tibble() %>% 
  set_names(c("country", "missings")) %>% 
  arrange(desc(missings))
# A tibble: 13 x 2
   country           missings
   <chr>                <int>
 1 country                  0
 2 area                     0
 3 population               0
 4 gdp                      0
 5 gdp_per_capita           0
 6 income_inequality        0
 7 hdi                      0
 8 fsi                      0
 9 cpi                      0
10 ief                      0
11 gpi                      0
12 wpfi                     0
13 di                       0
## 관측점 국가별 결측값
rowSums(is.na(oecd_fact_df)) %>% 
  as_tibble() %>% 
  bind_cols(oecd_fact_df %>% select(country)) %>% 
  rename(missings = value) %>% 
  arrange(desc(missings))
# A tibble: 36 x 2
   missings country       
      <dbl> <chr>         
 1        0 Australia     
 2        0 Austria       
 3        0 Belgium       
 4        0 Canada        
 5        0 Chile         
 6        0 Colombia      
 7        0 Czech Republic
 8        0 Denmark       
 9        0 Estonia       
10        0 Finland       
# ... with 26 more rows

정제된 데이터를 경제적인 측정지수 외에 다양한 지수를 바탕으로 일별해보자.

  • HDI: Human Development Index
  • FSI: Fragile States Index
  • CPI: Corruption Perceptions Index
  • IEF: Index of Economic Freedom
  • GPI: Global Peace Index
  • WPFI: Reporters Without Borders
  • DI: Democracy Index
oecd_fact_df %>% 
  arrange(desc(gdp)) %>% 
  DT::datatable() %>% 
  DT::formatRound(c("area", "population", "gdp", "gdp_per_capita"), digits = 0, interval = 3)

3 Digital Transformation

3.1 IMD Digital Competitiveness

The 2020 IMD World Digital Competitiveness Ranking 보고서

3.2 weforum Global Competitiveness

World Economic Forum, “Global Competitiveness Index 4.0”

보고서:

 

데이터 과학자 이광춘 저작

kwangchun.lee.7@gmail.com